import os
import pytesseract
# Perform OCR on an image
text = pytesseract.image_to_string('output_images\code-to-uppaal.pptx--page1.jpg', lang="chi_sim")
print(text)

def convert_jpg_to_text(img_path):
    text = pytesseract.image_to_string(img_path, lang="chi_sim")
    return text

PICS_PATH = r'C:\Users\houzh\Documents\WPSDrive\312506692\WPS云盘\研究生\历史文档汇总收集\PPT\imgs'
OUTPUT_INDEX_FILE = r'C:\Users\houzh\Documents\WPSDrive\312506692\WPS云盘\研究生\历史文档汇总收集\PPT\text-index.txt'
with open(OUTPUT_INDEX_FILE, 'w', encoding='utf8') as f:
    for img_name in os.listdir(PICS_PATH):
        img_path = os.path.join(PICS_PATH, img_name)
        print("processing ", img_name, )
        text = convert_jpg_to_text(img_path)
        f.write(f'{img_name.rsplit(".", 1)[0]}\n{text}\n\n')
